load library and data
GSEA_get_ranks <- function(res_input) {
res_GSEA <- res_input %>%
dplyr::select(SYMBOL, logFC_MMSE) %>%
na.omit() %>% # remove NA
distinct() %>% # remove duplicated
group_by(SYMBOL) %>%
summarize(logFC_MMSE = mean(logFC_MMSE))
deframe(res_GSEA)
}
res_names <- DGE_list %>% names()
GSEA_ranks <- vector(mode = "list", length = length(res_names))
for (i in 1:length(GSEA_ranks)) {
GSEA_ranks[[i]] <- GSEA_get_ranks(DGE_list[[i]])
}
names(GSEA_ranks) <- res_names
# GSEA_ranks %>% lapply(function(x){x %>% head(n= 1000 ) %>% tail(n=30)})
# running all GSEA using hallmark genesets
gsea_output_HM <- vector(mode = "list", length = length(GSEA_ranks))
for (i in 1:length(GSEA_ranks)) {
gsea_output_HM[[i]] <- GSEA(sort(GSEA_ranks[[i]], decreasing = TRUE), TERM2GENE = h_gene_sets, pvalueCutoff = 1.0, eps = 0)
}
names(gsea_output_HM) <- res_names
gsea_df_HM <- gsea_output_HM %>% lapply(function(x) {
x %>% as_tibble()
})
# %>% filter(padj < 0.2) %>% arrange(desc(NES))
gsea_df_HM$`CDK2i200mpkQD_vs_control` %>% datatable(caption = "CDK2i200mpkQD_vs_control")
gsea_df_HM$`CDK2i100mpkBID_vs_control` %>% datatable(caption = "CDK2i100mpkBID_vs_control")
# running all GSEA using all the msigDB gene-sets of interest
gsea_output <- vector(mode = "list", length = length(GSEA_ranks))
for (i in 1:length(GSEA_ranks)) {
gsea_output[[i]] <- GSEA(sort(GSEA_ranks[[i]], decreasing = TRUE), TERM2GENE = MSigDB.ofinterest, pvalueCutoff = 1.0, eps = 0)
}
names(gsea_output) <- res_names
saveRDS(gsea_output, "/researchers/krutika.ambani/Goel_lab_members/Cath_Dietrich/20230704_OV5398PDX_OVCAR3_ARC_Veh_RNA/R_analysis/OV598PDX/4.GSEA/data/rds/gesea_output.rds")
gsea_output <- readRDS("/researchers/krutika.ambani/Goel_lab_members/Cath_Dietrich/20230704_OV5398PDX_OVCAR3_ARC_Veh_RNA/R_analysis/OV598PDX/4.GSEA/data/rds/gesea_output.rds")
gsea_df <- gsea_output %>% lapply(function(x) {
x %>% as_tibble()
})
gsea_df$`CDK2i200mpkQD_vs_control` %>% datatable(caption = "CDK2i200mpkQD_vs_control")
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
gsea_df$`CDK2i100mpkBID_vs_control` %>% datatable(caption = "CDK2i100mpkBID_vs_control")
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
plots - all genesets
combined_genesets_up <- gsea_df %>%
lapply(function(x) {
x %>%
filter(p.adjust < 0.05 & NES > 0) %>%
arrange(desc(NES)) %>%
.$ID %>%
head(n = 10)
}) %>%
unlist() %>%
unique()
combined_genesets_down <- gsea_df %>%
lapply(function(x) {
x %>%
filter(p.adjust < 0.05 & NES < 0) %>%
arrange(NES) %>%
.$ID %>%
head(n = 10)
}) %>%
unlist() %>%
unique()
combined_genesets <- c(combined_genesets_up, combined_genesets_down) %>% unique()
GSEA_names <- gsub("res_", "", names(gsea_df))
GSEA_ggplot <- data.frame()
for (i in 1:length(GSEA_names)) {
temp <- gsea_df[[i]] %>%
mutate(group = GSEA_names[i]) %>%
dplyr::select(-leading_edge, -core_enrichment) %>%
filter(ID %in% combined_genesets)
GSEA_ggplot <- rbind(GSEA_ggplot, temp)
}
GSEA_ggplot$ID <- GSEA_ggplot$ID %>% factor(levels = combined_genesets)
GSEA_ggplot$group <- factor(GSEA_ggplot$group, levels = GSEA_names)

## Warning: Removed 5 rows containing missing values (`geom_point()`).

gseaplot(gsea_output$CDK2i200mpkQD_vs_control, geneSetID = c("SASP_Demaria"), title = "CDK2i200mpkQD_vs_control", pvalue_table = TRUE, base_size = 14)

gseaplot2(gsea_output$CDK2i200mpkQD_vs_control, geneSetID = c("SASP_Demaria"), title = "CDK2i200mpkQD_vs_control", color = c("#E495A5", "#86B875"), pvalue_table = TRUE, base_size = 14)

gseaplot2(gsea_output$CDK2i100mpkBID_vs_control, geneSetID = c("SASP_Demaria"), title = "CDK2i100mpkBID_vs_control", color = c("#E495A5", "#86B875"), pvalue_table = TRUE, base_size = 14)

plots - hallmark genesets
combined_genesets_up <- gsea_df_HM %>%
lapply(function(x) {
x %>%
filter(p.adjust < 0.05 & NES > 0) %>%
arrange(desc(NES)) %>%
.$ID %>%
head(n = 10)
}) %>%
unlist() %>%
unique()
combined_genesets_down <- gsea_df_HM %>%
lapply(function(x) {
x %>%
filter(p.adjust < 0.05 & NES < 0) %>%
arrange(NES) %>%
.$ID %>%
head(n = 10)
}) %>%
unlist() %>%
unique()
combined_genesets <- c(combined_genesets_up, combined_genesets_down) %>% unique()
GSEA_names <- gsub("res_", "", names(gsea_df_HM))
GSEA_ggplot <- data.frame()
for (i in 1:length(GSEA_names)) {
temp <- gsea_df_HM[[i]] %>%
mutate(group = GSEA_names[i]) %>%
dplyr::select(-leading_edge, -core_enrichment) %>%
filter(ID %in% combined_genesets)
GSEA_ggplot <- rbind(GSEA_ggplot, temp)
}
GSEA_ggplot$ID <- GSEA_ggplot$ID %>% factor(levels = combined_genesets)
GSEA_ggplot$group <- factor(GSEA_ggplot$group, levels = GSEA_names)
# GSEA_ggplot$NES[GSEA_ggplot$p.adjust > 0.05] <- NA
GSEA_ggplot$NES %>% range()
# decided not to use this but the heatmap instead
GSEA_ggplot %>%
ggplot(aes(x = group, y = ID, fill = NES)) +
scale_fill_gradientn(colours = c("dodgerblue3", "white", "orange"), limits = c(-3, 3)) +
geom_tile() +
geom_tile(data = filter(GSEA_ggplot, p.adjust > 0.05), fill = "white") +
geom_text(aes(label = sprintf("%0.2f", NES))) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 12),
axis.text.y = element_text(color = "black", size = 10),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.position = "bottom"
) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

GSEA_ggplot$NES %>% range()
GSEA_ggplot$p.adjust[GSEA_ggplot$p.adjust > 0.05] <- NA
# decided not to use this but the heatmap instead
GSEA_ggplot %>%
ggplot(aes(x = group, y = ID, col = NES)) +
# scale_fill_gradientn(colours = c("dodgerblue3","white", "orange"), limits = c(-4,4)) +
scale_color_gradientn(colours = c("dodgerblue3", "white", "orange")) +
geom_point(aes(size = -log10(p.adjust))) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 12),
axis.text.y = element_text(color = "black", size = 10),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.position = "right"
)
## Warning: Removed 5 rows containing missing values (`geom_point()`).

gseaplot(gsea_output_HM$CDK2i200mpkQD_vs_control, geneSetID = c("HALLMARK_E2F_TARGETS"), title = "CDK2i200mpkQD_vs_control", pvalue_table = TRUE, base_size = 14)

gseaplot2(gsea_output_HM$CDK2i200mpkQD_vs_control, geneSetID = c("HALLMARK_E2F_TARGETS"), title = "CDK2i200mpkQD_vs_control", color = c("#E495A5", "#86B875"), pvalue_table = TRUE, base_size = 14)

gseaplot2(gsea_output_HM$CDK2i100mpkBID_vs_control, geneSetID = c("HALLMARK_E2F_TARGETS"), title = "CDK2i100mpkBID_vs_control", color = c("#E495A5", "#86B875"), pvalue_table = TRUE, base_size = 14)
